Some of my emu optimizations have poor performance, and I want to make some plots to see if there is a bug I should be fixing.



In [58]:

    
%load_ext autoreload
%autoreload 2









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [59]:

    
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()



In [60]:

    
from pearce.emulator import NashvilleHot
from itertools import product
from GPy.kern import *
import numpy as np
from os import path
from sys import argv
import h5py

training_file = '/u/ki/swmclau2/des/ds_hsab/PearceDsHSABCosmo.hdf5' assert path.isfile(training_file) test_file = '/u/ki/swmclau2/des/ds_hsab_test/PearceDsHSABCosmoTest.hdf5' assert path.isfile(test_file)



In [61]:

    
training_file = '/u/ki/swmclau2/des/wp_zheng07/PearceWpCosmo.hdf5'
assert path.isfile(training_file)
test_file = '/u/ki/swmclau2/des/wp_zheng07_test/PearceWpCosmoTest.hdf5'
assert path.isfile(test_file)



In [62]:

    
with h5py.File(training_file, 'r') as f:
    HOD_params = len(f.attrs['hod_param_names'])

emu.scale_bin_centers[0]



In [63]:

    
fixed_params = {'z':0.0}#, 'r':0.11972916}

cosmo_idx, hod_idx = 2,-1 cosmo_kernels = [Linear(input_dim=7, ARD=True), RBF(input_dim=7, ARD=True), Linear(input_dim=7, ARD=True) + RBF(input_dim=7, ARD=True), Linear(input_dim=7, ARD=True) + Matern32(input_dim=7, ARD=True), \ Matern32(input_dim=7, ARD=True)+RBF(input_dim=7, ARD=True) + Bias(input_dim=7)] HOD_kernels = [ Matern32(input_dim=HOD_params, ARD=True), RBF(input_dim=HOD_params, ARD=True) + Linear(input_dim=HOD_params, ARD=True), Matern32(input_dim=HOD_params, ARD=True)+RBF(input_dim=HOD_params, ARD=True) + Bias(input_dim=HOD_params)\ , RBF(input_dim=HOD_params, ARD=True) ] #k = (cosmo_kernels[3], HOD_kernels[0]) k = (cosmo_kernels[cosmo_idx], HOD_kernels[hod_idx])

hyperparams = hyperparams = {'kernel': k , \ 'optimize': True}



In [64]:

    
emu = NashvilleHot(training_file, fixed_params = fixed_params)#, downsample_factor = 0.1)

emu.save_as_default_kernel()



In [65]:

    
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)#, downsample_factor = 0.1)



In [66]:

    
data_y[:, 0]









    Out[66]:





array([3.90684245, 3.72096144, 3.52835696, 3.32642519, 3.11999331,
       2.91263403, 2.70806826, 2.50591129, 2.33700497, 2.206158  ,
       2.10133253, 1.99569879, 1.88636746, 1.76803183, 1.6412862 ,
       1.48958112, 1.31247608, 1.10681706])



In [67]:

    
pred_y[:,0]









    Out[67]:





array([3.9728706 , 3.73341016, 3.50564809, 3.29490581, 3.08958819,
       2.88766055, 2.67121124, 2.48032967, 2.31645286, 2.18949253,
       2.08574743, 1.98465552, 1.88545305, 1.76996831, 1.63719236,
       1.48487145, 1.31212241, 1.0972835 ])



In [68]:

    
plt.plot(emu.scale_bin_centers, ((10**pred_y - 10**data_y)/(10**data_y)).mean(axis=1), label= 'Bias')
plt.plot(emu.scale_bin_centers, (np.abs(10**pred_y - 10**data_y)/(10**data_y)).mean(axis =1), label = 'Acc')
plt.plot(emu.scale_bin_centers, np.zeros_like(emu.scale_bin_centers))
plt.xscale('log')
plt.legend(loc='best')
#plt.ylim([-0.05, 0.2])
plt.show()
# average over realizations



In [69]:

    
pred_y_rs= pred_y.reshape((len(emu.scale_bin_centers),5,7, -1), order = 'F')[:,0,:,:]
data_y_rs= data_y.reshape((len(emu.scale_bin_centers),5,7, -1), order = 'F').mean(axis = 1)
R = (10**pred_y_rs - 10**data_y_rs).reshape((18,-1), order = 'F')



In [70]:

    
cov = R.dot(R.T)/(R.shape[1]-1)
print 'Yerr', np.sqrt(np.diag(cov))/(10**data_y.mean(axis=1))
print '*'*10

#np.save(save_fname, cov)









    



Yerr [0.30164708 0.2349487  0.25888845 0.25966125 0.26440171 0.2106976
 0.10414359 0.09902543 0.08632132 0.04283271 0.03471622 0.02482874
 0.04026806 0.04538967 0.03783757 0.03792386 0.04448847 0.03968964]
**********



In [71]:

    
from pearce.mocks import cat_dict
np.random.seed(0)
boxno = np.random.randint(0,7)
hod_idx = np.random.randint(0,100)

realization = 0
cat = cat_dict['testbox'](boxno = boxno, realization = realization )#construct the specified catalog!



In [72]:

    
print hod_idx, boxno



In [73]:

    
with h5py.File(test_file, 'r') as f:
    hod_param_names = f.attrs['hod_param_names']
    hod_param_vals = f.attrs['hod_param_vals'][hod_idx]
    
    cosmo_param_names = f.attrs['cosmo_param_names']
    cosmo_param_vals = f.attrs['cosmo_param_vals'][boxno*5+realization]
    
    true_data = f['cosmo_no_%02d'%(boxno*5+realization)]['a_1.000']['obs'][hod_idx]

hod_params = dict(zip(hod_param_names, hod_param_vals))

#hod_params.update(dict(zip(cosmo_param_names, cosmo_param_vals)))



In [74]:

    
cpv = cat._get_cosmo_param_names_vals()

cat_val_dict =  {key: val for key, val in zip(cpv[0], cpv[1])}



In [75]:

    
true_param_dict = cat_val_dict.copy()
for hp, hv in hod_params.iteritems():
    if hp == 'logMmin':
        continue
    true_param_dict[hp] = hv
true_pred = emu.emulate_wrt_r(true_param_dict)[0]



In [76]:

    
rbc = emu.scale_bin_centers

plt.plot(rbc, 10**true_pred, label = 'Emu') plt.plot(rbc, true_data, label = 'NB Calculation') plt.plot(rbc, 10**test_y, label = 'Testbox') plt.legend(loc ='best') plt.loglog();

plt.plot(rbc, 10**true_pred/true_data, label = 'Emu') plt.plot(rbc, true_data/true_data, label = 'NB Calculation') plt.plot(rbc, 10**test_y/true_data, label = 'Testbox') plt.legend(loc ='best') plt.xscale('log');



In [77]:

    
fig = plt.figure(figsize=(10,7))

plt.plot(rbc, 10**true_pred/10**true_data,label = 'Emu at Truth', color ='k')
#plt.plot(rbc, 10**pop_xi.mean(axis = 0), label = 'Sim' )
#plt.errorbar(rbc, np.ones_like(true_data), yerr=yerr/true_data, label = 'Data')
plt.plot(rbc, np.ones_like(true_data), label = 'Data')

plt.xscale('log')
plt.legend(loc='best')
plt.show();



In [78]:

    
emu.get_param_names()









    Out[78]:





['ombh2',
 'omch2',
 'w0',
 'ns',
 'ln10As',
 'H0',
 'Neff',
 'logM1',
 'logM0',
 'sigma_logM',
 'alpha',
 'conc_gal_bias']



In [79]:

    
N = 100
cmap = sns.color_palette("BrBG_d", N)



In [80]:

    
fig = plt.figure(figsize=(10,7))
varied_pname = 'mean_occupation_centrals_assembias_param1'
lower, upper = emu.get_param_bounds(varied_pname)
i = 0
for c, val in zip(cmap, np.linspace(lower, upper, N) ):
    #print i, val
    #i+=1
    param_dict = true_param_dict.copy()
    param_dict[varied_pname] = val
    pred = emu.emulate_wrt_r(param_dict)[0]
    plt.plot(rbc, (10**pred-10**true_data[-len(emu.scale_bin_centers):])/10**true_data[-len(emu.scale_bin_centers):],\
             alpha = 0.5,label = val, color =c)
    
pred = emu.emulate_wrt_r(true_param_dict)[0]
plt.plot(rbc, (10**pred-10**true_data[-len(emu.scale_bin_centers):])/10**true_data[-len(emu.scale_bin_centers):], label = 'Truth', color = 'k')    
#plt.errorbar(rbc, np.zeros_like(true_data[-len(emu.scale_bin_centers):]), yerr=yerr/true_data[-len(emu.scale_bin_centers):], label = 'Data')
#plt.loglog()
plt.xscale('log')
#plt.legend(loc='best')
plt.show();









    



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-80-31011b75fa77> in <module>()
      1 fig = plt.figure(figsize=(10,7))
      2 varied_pname = 'mean_occupation_centrals_assembias_param1'
----> 3 lower, upper = emu.get_param_bounds(varied_pname)
      4 i = 0
      5 for c, val in zip(cmap, np.linspace(lower, upper, N) ):

/u/ki/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/emu.pyc in get_param_bounds(self, param)
    437             return self._ordered_params[param]
    438         except KeyError:
--> 439             raise KeyError("Parameter %s could not be found." % param)
    440 
    441     # TODO Should I unify some syntax between this and the one below?

KeyError: 'Parameter mean_occupation_centrals_assembias_param1 could not be found.'





    





<matplotlib.figure.Figure at 0x7f5d028bcfd0>



In [ ]:

    
train_y = (emu.y.T*emu._y_std + emu._y_mean).T



In [ ]:

    
test_y = data_y.reshape((18, 35, 100))



In [ ]:

    
plt.plot(emu.scale_bin_centers, emu._y_mean)
plt.plot(emu.scale_bin_centers, test_y.mean(axis = (1,2)))
plt.xscale('log')



In [ ]: